using Distributions
using StatsPlots
using LaTeXStrings
using CSV
using DataFrames
using StatisticalRethinking
using Logging
using Turing
using MCMCChains
# setting default attributes for plots
default(legend=false)
Code 4.1
n = rand(Uniform(-1, 1), 1000, 16);
pos = sum.(eachrow(n));
density(pos)
Code 4.2
prod(1 .+ rand(Uniform(0, .1), 12))
1.69373415225129
Code 4.3
u = Uniform(0, .1)
growth = prod.(eachrow(1 .+ rand(u, 10000, 12)));
density(growth; label="density")
# overlay normal distribution
μ = mean(growth)
σ = std(growth)
plot!(Normal(μ, σ); label="Normal")
Code 4.4
big = prod.(eachrow(1 .+ rand(Uniform(0, 0.5), 10000, 12)));
small = prod.(eachrow(1 .+ rand(Uniform(0, 0.01), 10000, 12)));
density([big, small]; layout=(2, 1))
Code 4.5
density(log.(big))
Code 4.6
w = 6
n = 9
p_grid = range(0, 1; length=100)
bin_dens = [pdf(Binomial(n, p), w) for p in p_grid]
uni_dens = [pdf(Uniform(0, 1), p) for p in p_grid];
posterior = bin_dens .* uni_dens
posterior /= sum(posterior);
Code 4.7
d = DataFrame(CSV.File("data/Howell1.csv"));
Code 4.8
describe(d)
4 rows × 7 columns
| variable | mean | min | median | max | nmissing | eltype | |
|---|---|---|---|---|---|---|---|
| Symbol | Float64 | Real | Float64 | Real | Int64 | DataType | |
| 1 | height | 138.264 | 53.975 | 148.59 | 179.07 | 0 | Float64 |
| 2 | weight | 35.6106 | 4.25242 | 40.0578 | 62.9926 | 0 | Float64 |
| 3 | age | 29.3444 | 0.0 | 27.0 | 88.0 | 0 | Float64 |
| 4 | male | 0.472426 | 0 | 0.0 | 1 | 0 | Int64 |
Code 4.9
precis(d)
┌────────┬────────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├────────┼────────────────────────────────────────────────────────────┤ │ height │ 138.264 27.6024 81.1086 148.59 165.735 ▁▁▁▂▂▂▂▂▂██▆▁ │ │ weight │ 35.6106 14.7192 9.3607 40.0578 54.5029 ▁▃▄▄▃▂▃▆██▅▃▁ │ │ age │ 29.3444 20.7469 1.0 27.0 66.135 █▆▆▆▆▃▃▁▁ │ │ male │ 0.4724 0.4997 0.0 0.0 1.0 █▁▁▁▁▁▁▁▁▁█ │ └────────┴────────────────────────────────────────────────────────────┘
Code 4.10
d.height
544-element Vector{Float64}:
151.765
139.7
136.525
156.845
145.415
163.83
149.225
168.91
147.955
165.1
154.305
151.13
144.78
⋮
156.21
152.4
162.56
114.935
67.945
142.875
76.835
145.415
162.56
156.21
71.12
158.75
Code 4.11
d2 = d[d.age .>= 18,:];
Code 4.12
plot(Normal(178, 20); xlim=(100, 250))
Code 4.13
plot(Uniform(0, 50), xlim=(-10, 60), ylim=(0, 0.03))
Code 4.14
size = 10_000
sample_μ = rand(Normal(178, 20), size)
sample_σ = rand(Uniform(0, 50), size);
prior_h = [rand(Normal(μ, σ)) for (μ, σ) in zip(sample_μ, sample_σ)];
p1 = density(sample_μ; title="μ")
p2 = density(sample_σ; title="σ")
p3 = density(prior_h; title="prior_h")
plot(p1, p2, p3, layout=@layout [a b; c])
Code 4.15
sample_μ = rand(Normal(178, 100), size)
prior_h = [rand(Normal(μ, σ)) for (μ, σ) in zip(sample_μ, sample_σ)];
density(prior_h)
vline!([0, 272])
Code 4.16
μ_list = range(150, 160; length=100)
σ_list = range(7, 9; length=100)
log_likelihood = [
sum(logpdf(Normal(μ, σ), d2.height))
for μ ∈ μ_list, σ ∈ σ_list
]
log_prod = log_likelihood .+ [
logpdf(Normal(178, 20), μ) + logpdf(Uniform(0, 50), σ)
for μ ∈ μ_list, σ ∈ σ_list
];
max_prod = maximum(log_prod)
prob = @. exp(log_prod - max_prod);
Code 4.17
# note the transposition, that's due to Julia matrix order
contour(μ_list, σ_list, prob')
Code 4.18
heatmap(μ_list, σ_list, prob')
Code 4.19
indices = collect(Iterators.product(1:length(μ_list), 1:length(σ_list)));
sample_idx = wsample(vec(indices), vec(prob), 10_000; replace=true)
sample_μ = μ_list[first.(sample_idx)]
sample_σ = σ_list[last.(sample_idx)];
Code 4.20
scatter(sample_μ, sample_σ; alpha=0.1)
Code 4.21
p1 = density(sample_μ)
p2 = density(sample_σ)
plot(p1, p2, layout=(2,1))
Code 4.22
println(hpdi(sample_μ, alpha=0.11))
println(hpdi(sample_σ, alpha=0.11))
[153.83838383838383, 155.15151515151516] [7.262626262626263, 8.191919191919192]
Code 4.23
d3 = sample(d2.height, 20);
Code 4.24
μ_list = range(150, 170; length=200)
σ_list = range(4, 20; length=200)
log_likelihood = [
sum(logpdf(Normal(μ, σ), d3))
for μ ∈ μ_list, σ ∈ σ_list
]
log_prod = log_likelihood .+ [
logpdf(Normal(178, 20), μ) + logpdf(Uniform(0, 50), σ)
for μ ∈ μ_list, σ ∈ σ_list
]
max_prod = maximum(log_prod)
prob2 = @. exp(log_prod - max_prod)
indices = collect(Iterators.product(1:length(μ_list), 1:length(σ_list)));
sample2_idx = wsample(vec(indices), vec(prob2), 10_000; replace=true)
sample2_μ = μ_list[first.(sample2_idx)]
sample2_σ = σ_list[last.(sample2_idx)]
scatter(sample2_μ, sample2_σ; alpha=0.1)
Code 4.25
density(sample2_σ)
μ = mean(sample2_σ)
σ = std(sample2_σ)
plot!(Normal(μ, σ); label="Normal")
Code 4.26
d = DataFrame(CSV.File("data/Howell1.csv"));
d2 = d[d.age .>= 18,:];
Code 4.27
@model function model_height(height)
μ ~ Normal(178, 20)
σ ~ Uniform(0, 50)
height ~ Normal(μ, σ)
end
model_height (generic function with 1 method)
Code 4.28
Logging.disable_logging(Logging.Warn)
m4_1 = sample(model_height(d2.height), NUTS(), 1000)
print(MCMCChains.header(m4_1))
Iterations = 501:1:1500 Number of chains = 1 Samples per chain = 1000 Wall duration = 7.31 seconds Compute duration = 7.31 seconds parameters = μ, σ internals = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size
Code 4.29
display.(describe(m4_1; q=[0.055, 0.945]));
Summary Statistics parameters mean std naive_se mcse ess rhat ⋯ Symbol Float64 Float64 Float64 Float64 Float64 Float64 ⋯ μ 154.6156 0.4217 0.0133 0.0132 1001.0129 0.9996 ⋯ σ 7.7756 0.2823 0.0089 0.0066 921.4152 0.9990 ⋯ 1 column omitted
Quantiles parameters 5.5% 94.5% Symbol Float64 Float64 μ 153.9495 155.3158 σ 7.3354 8.2462
Code 4.30
init_vals = [mean(d2.height), std(d2.height)]
chain = sample(model_height(d2.height), NUTS(), 1000, init_theta=init_vals)
Chains MCMC chain (1000×14×1 Array{Float64, 3}):
Iterations = 501:1:1500
Number of chains = 1
Samples per chain = 1000
Wall duration = 0.64 seconds
Compute duration = 0.64 seconds
parameters = μ, σ
internals = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rhat ⋯
Symbol Float64 Float64 Float64 Float64 Float64 Float64 ⋯
μ 154.5995 0.4242 0.0134 0.0072 1012.5709 0.9992 ⋯
σ 7.7647 0.2966 0.0094 0.0067 1286.1184 0.9993 ⋯
1 column omitted
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
μ 153.7784 154.3115 154.6048 154.8842 155.4313
σ 7.2046 7.5549 7.7559 7.9824 8.3348
Code 4.31
@model function model_height(height)
μ ~ Normal(178, 0.1)
σ ~ Uniform(0, 50)
height ~ Normal(μ, σ)
end
m4_2 = sample(model_height(d2.height), NUTS(), 1000)
display.(describe(m4_2; q=[0.055, 0.945]));
Summary Statistics parameters mean std naive_se mcse ess rhat ⋯ Symbol Float64 Float64 Float64 Float64 Float64 Float64 ⋯ μ 177.8666 0.0965 0.0031 0.0033 827.0873 1.0025 ⋯ σ 24.5920 0.9806 0.0310 0.0198 732.2670 0.9992 ⋯ 1 column omitted
Quantiles parameters 5.5% 94.5% Symbol Float64 Float64 μ 177.7154 178.0158 σ 23.0851 26.2285
Code 4.32
cov(hcat(m4_1[:μ], m4_1[:σ]))
2×2 Matrix{Float64}:
0.177812 -0.00351275
-0.00351275 0.0796725
Code 4.33
c = cov(hcat(m4_1[:μ], m4_1[:σ]))
cov2cor(c, diag(c))
2×2 Matrix{Float64}:
1.0 -0.247958
-0.247958 1.0
Code 4.34
# resetrange is needed due to bug in MCMCChains: https://github.com/TuringLang/MCMCChains.jl/issues/324
# once it will be fixed, direct sampling from the chain will be possible
samp_chain = sample(resetrange(m4_1), 10_000)
samp_df = DataFrame(samp_chain)
first(samp_df, 5)
5 rows × 2 columns
| μ | σ | |
|---|---|---|
| Float64 | Float64 | |
| 1 | 154.53 | 8.13625 |
| 2 | 154.121 | 7.80365 |
| 3 | 154.524 | 7.53228 |
| 4 | 154.361 | 7.79269 |
| 5 | 154.456 | 8.83125 |
Code 4.35
precis(samp_df)
┌───────┬───────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├───────┼───────────────────────────────────────────────────────────┤ │ μ │ 154.614 0.4141 153.957 154.626 155.296 ▁▁▂▄▆██▆▄▂▂▁▁ │ │ σ │ 7.7817 0.2852 7.3322 7.7671 8.2557 ▁▂▆██▄▂▁▁▁ │ └───────┴───────────────────────────────────────────────────────────┘
Code 4.36
data = hcat(m4_1[:μ], m4_1[:σ])
μ = mean(data, dims=1)
σ = cov(data)
mvn = MvNormal(vec(μ), σ)
post = rand(mvn, 10_000);
print(mvn)
FullNormal( dim: 2 μ: [154.6155654910577, 7.775624515903448] Σ: [0.17781193597271996 -0.003512747124669828; -0.003512747124669828 0.07967253638753385] )
Code 4.37
d = DataFrame(CSV.File("data/Howell1.csv"));
d2 = d[d.age .>= 18,:];
# fancy way of doing scatter(d2.weight, d2.height)
@df d2 scatter(:weight, :height)
Code 4.38
Random.seed!(2971)
N = 100
a = rand(Normal(178, 20), N)
b = rand(Normal(0, 10), N);
Code 4.39
p = hline([0, 272]; ylims=(-100, 400), xlabel="weight", ylabel="hegiht")
title!(L"\beta \sim \mathcal{N}(\mu=0,\sigma=10)")
x_mean = mean(d2.weight)
xlims = extrema(d2.weight) # getting min and max in one pass
for (α, β) ∈ zip(a, b)
plot!(x -> α + β * (x - x_mean); xlims=xlims, c=:black, alpha=0.3)
end
display(p)
Code 4.40
b = rand(LogNormal(0, 1), 10_000)
density(b, xlims=(0, 5), bandwidth=0.1)
Code 4.41
Random.seed!(2971)
N = 100
a = rand(Normal(178, 20), N)
b = rand(LogNormal(0, 1), N);
Code 4.42
d = DataFrame(CSV.File("data/Howell1.csv"));
d2 = d[d.age .>= 18,:]
xbar = mean(d2.weight)
@model function height_regr_model(weight, height)
a ~ Normal(178, 20)
b ~ LogNormal(0, 1)
μ = @. a + b * (weight - xbar)
σ ~ Uniform(0, 50)
height ~ MvNormal(μ, σ)
end
m4_3 = sample(height_regr_model(d2.weight, d2.height), NUTS(), 1000)
m4_3 = resetrange(m4_3);
Code 4.43
@model function height_regr_model_exp(weight, height)
a ~ Normal(178, 20)
log_b ~ Normal(0, 1)
μ = @. a + exp(log_b) * (weight - xbar)
σ ~ Uniform(0, 50)
height ~ MvNormal(μ, σ)
end
m4_3b = sample(height_regr_model_exp(d2.weight, d2.height), NUTS(), 1000);
Code 4.44
m4_3_df = DataFrame(m4_3)
precis(m4_3_df)
┌───────┬────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├───────┼────────────────────────────────────────────────────────┤ │ a │ 154.606 0.2625 154.175 154.616 155.014 ▁▁▂▅██▅▂▁▁ │ │ b │ 0.9014 0.0423 0.835 0.9021 0.9665 ▁▃██▃▁ │ │ σ │ 5.1071 0.1978 4.8019 5.0921 5.4553 ▁▁▅█▅▂▁ │ └───────┴────────────────────────────────────────────────────────┘
Code 4.45
round.(cov(Matrix(m4_3_df)), digits=3)
3×3 Matrix{Float64}:
0.069 -0.0 -0.001
-0.0 0.002 -0.0
-0.001 -0.0 0.039
Code 4.46
p = @df d2 scatter(:weight, :height; alpha=0.3)
chain = resetrange(m4_3)
samples = sample(chain, 1000)
a_map = mean(samples[:a])
b_map = mean(samples[:b])
plot!(x -> a_map + b_map*(x-xbar))
Code 4.47
post = sample(m4_3, 1000)
post_df = DataFrame(post)
post_df[1:5,:]
5 rows × 3 columns
| a | b | σ | |
|---|---|---|---|
| Float64 | Float64 | Float64 | |
| 1 | 155.241 | 0.939497 | 5.32578 |
| 2 | 154.435 | 0.890103 | 5.15068 |
| 3 | 154.393 | 0.95766 | 5.12624 |
| 4 | 154.644 | 0.975131 | 5.02818 |
| 5 | 154.719 | 0.833828 | 5.12795 |
Code 4.48
N = 10
dN = d2[1:N,:]
@model function height_regr_model_N(weight, height)
a ~ Normal(178, 20)
b ~ LogNormal(0, 1)
m_weight = mean(weight)
μ = @. a + b * (weight - m_weight)
σ ~ Uniform(0, 50)
height ~ MvNormal(μ, σ)
end
mN = sample(height_regr_model(dN.weight, dN.height), NUTS(), 1000)
mN = resetrange(mN);
Code 4.49
post = sample(mN, 20)
post_df = DataFrame(post);
xlims = extrema(d2.weight)
ylims = extrema(d2.height)
p = @df dN scatter(:weight, :height; xlims=xlims, ylims=ylims)
title!("N = $N"; xlab="weight", ylab="height")
x_mean = mean(dN.weight)
for (a, b) ∈ zip(post_df.a, post_df.b)
plot!(x -> a + b * (x-x_mean); c="black", alpha=0.3)
end
display(p)
Code 4.50
post = sample(m4_3, 1000)
post_df = DataFrame(post)
μ_at_50 = @. post_df.a + post_df.b * (50 - xbar);
Code 4.51
density(μ_at_50; lw=2, xlab=L"\mu|weight=50")
Code 4.52
percentile(μ_at_50, [5.5, 94.5])
2-element Vector{Float64}:
158.5792439935818
159.65866731906047
Code 4.53
μ = StatisticalRethinking.link(post_df, [:a :b], d2.weight, xbar);
μ = hcat(μ...);
Base.size(μ), μ[1:5,1]
((1000, 352), [157.56293534189655, 157.70353697789378, 156.97988672219105, 156.85965609569527, 157.2632797224988])
Code 4.54
weight_seq = 25:70
μ = StatisticalRethinking.link(post_df, [:a :b], weight_seq, xbar);
μ = hcat(μ...);
Base.size(μ), μ[1:5,1]
((1000, 46), [137.04072934028633, 136.64103095564798, 135.80904017330352, 136.73820023564727, 137.81265964505803])
Code 4.55
p = plot()
for i in 1:100
scatter!(weight_seq, μ[i,:]; c=:blue, alpha=0.2)
end
display(p)
Code 4.56
μ_mean = mean.(eachcol(μ))
# small trick here: we wrap second argument into tuple to broadcast it to every percentile application
# the same could be done with: map(x -> percentile(x, [5.5, 94.5]), eachcol(μ))
μ_PI = percentile.(eachcol(μ), ([5.5, 94.5],))
μ_PI = vcat(μ_PI'...);
Code 4.57
@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
Code 4.58
post = sample(m4_3, 1000)
post = DataFrame(post)
weight_seq = 25:70
μ = map(w -> post.a + post.b * (w - xbar), weight_seq)
μ = hcat(μ...)
μ_mean = mean.(eachcol(μ))
μ_CI = percentile.(eachcol(μ), ([5.5, 94.5],));
Code 4.59
sim_height = simulate(post, [:a, :b, :σ], weight_seq .- xbar);
Base.size(sim_height), sim_height[1:5,1]
((1000, 46), [131.0600330744535, 131.07613382456364, 144.0054107126664, 133.0809884065339, 139.1993923582497])
Code 4.60
height_PI = percentile.(eachcol(sim_height), ([5.5, 94.5],))
height_PI = vcat(height_PI'...);
Code 4.61
@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)
Code 4.62
post = sample(m4_3, 10_000)
post = DataFrame(post)
sim_height = simulate(post, [:a, :b, :σ], weight_seq .- xbar)
height_PI = percentile.(eachcol(sim_height), ([5.5, 94.5],))
height_PI = vcat(height_PI'...);
@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)
Code 4.63
post = sample(m4_3, 1000)
post = DataFrame(post)
sim_height = [
[
rand(Normal(a + b * (w - xbar), σ))
for (a, b, σ) ∈ zip(post.a, post.b, post.σ)
]
for w ∈ weight_seq
]
sim_height = hcat(sim_height...)
height_PI = percentile.(eachcol(sim_height), ([5.5, 94.5],));
height_PI = vcat(height_PI'...);
@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)
Code 4.64
d = DataFrame(CSV.File("data/Howell1.csv"))
scatter(d.weight, d.height; alpha=0.3)
Code 4.65
d[!, :weight_s] = standardize(ZScoreTransform, d.weight)
d[!, :weight_s2] = d.weight_s.^2;
@model function height_regr_m2(weight_s, weight_s2, height)
a ~ Normal(178, 20)
b1 ~ LogNormal(0, 1)
b2 ~ Normal(0, 1)
μ = @. a + b1 * weight_s + b2 * weight_s2
σ ~ Uniform(0, 50)
height ~ MvNormal(μ, σ)
end
m4_5 = sample(height_regr_m2(d.weight_s, d.weight_s2, d.height), NUTS(), 1000)
m4_5 = resetrange(m4_5)
m4_5_df = DataFrame(m4_5);
Code 4.66
precis(m4_5_df)
┌───────┬───────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├───────┼───────────────────────────────────────────────────────────┤ │ a │ 146.054 0.3497 145.501 146.059 146.576 ▁▁▂▃▅██▇▄▂▁▁▁ │ │ b1 │ 21.7228 0.2941 21.2498 21.7259 22.1891 ▁▁▁▄▇█▇▄▂▁▁ │ │ b2 │ -7.8033 0.2589 -8.1943 -7.8236 -7.3754 ▁▁▂▄█▆▅▂▁ │ │ σ │ 5.8137 0.179 5.5276 5.8055 6.1068 ▁▁▁▃▆██▆▄▂▁▁ │ └───────┴───────────────────────────────────────────────────────────┘
Code 4.67
Random.seed!(1)
df = sample(m4_5_df, 1000)
weight_seq = range(-2.2, 2; length=30)
# explicit logic of link
mu = [
df.a + df.b1 * w_s + df.b2 * w_s^2
for w_s ∈ weight_seq
]
mu = hcat(mu...)
mu_mean = mean.(eachcol(mu))
mu_PI = percentile.(eachcol(mu), ([5.5, 94.5],))
mu_PI = vcat(mu_PI'...)
# explicit logic of sim
sim_height = [
[
rand(Normal(row.a + row.b1 * w_s + row.b2 * w_s^2, row.σ))
for row ∈ eachrow(df)
]
for w_s ∈ weight_seq
]
sim_height = hcat(sim_height...);
height_PI = percentile.(eachcol(sim_height), ([5.5, 94.5],))
height_PI = vcat(height_PI'...);
Code 4.68
p_square = @df d scatter(:weight_s, :height; alpha=0.3, title="Square poly")
plot!(weight_seq, mu_mean; c=:black)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=mu_PI, fillalpha=0.3)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)
Code 4.69
d[!, :weight_s3] = d.weight_s.^3;
@model function height_regr_m3(weight_s, weight_s2, weight_s3, height)
a ~ Normal(178, 20)
b1 ~ LogNormal(0, 1)
b2 ~ Normal(0, 1)
b3 ~ Normal(0, 1)
μ = @. a + b1 * weight_s + b2 * weight_s2 + b3 * weight_s3
σ ~ Uniform(0, 50)
height ~ MvNormal(μ, σ)
end
m4_6 = sample(height_regr_m3(d.weight_s, d.weight_s2, d.weight_s3, d.height), NUTS(), 1000)
m4_6 = resetrange(m4_6)
m4_6_df = DataFrame(m4_6)
precis(m4_6_df)
┌───────┬───────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├───────┼───────────────────────────────────────────────────────────┤ │ a │ 146.399 0.3158 145.888 146.405 146.898 ▁▁▁▃▅██▆▃▁▁ │ │ b1 │ 15.2346 0.4765 14.4594 15.2243 15.9857 ▁▁▁▄█▅▁▁ │ │ b2 │ -6.213 0.2561 -6.6352 -6.2127 -5.8004 ▁▁▂▅██▄▂▁▁ │ │ b3 │ 3.5764 0.2239 3.2084 3.584 3.9364 ▁▂▄██▃▁▁ │ │ σ │ 4.8605 0.147 4.6456 4.8546 5.0868 ▁▁▁▄▇█▇▄▂▁▁▁▁ │ └───────┴───────────────────────────────────────────────────────────┘
Random.seed!(1)
df = sample(m4_6_df, 1000)
weight_seq = range(-2.2, 2; length=30)
# explicit logic of link
mu = [
df.a + df.b1 * w_s + df.b2 * w_s^2 + df.b3 * w_s^3
for w_s ∈ weight_seq
]
mu = hcat(mu...)
mu_mean = mean.(eachcol(mu))
mu_PI = percentile.(eachcol(mu), ([5.5, 94.5],))
mu_PI = vcat(mu_PI'...)
# explicit logic of sim
sim_height = [
[
rand(Normal(row.a + row.b1 * w_s + row.b2 * w_s^2 + row.b3 * w_s^3, row.σ))
for row ∈ eachrow(df)
]
for w_s ∈ weight_seq
]
sim_height = hcat(sim_height...);
height_PI = percentile.(eachcol(sim_height), ([5.5, 94.5],))
height_PI = vcat(height_PI'...);
p_cubic = @df d scatter(:weight_s, :height; alpha=0.3, title="Cubic poly")
plot!(weight_seq, mu_mean; c=:black)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=mu_PI, fillalpha=0.3)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)
plot(p_square, p_cubic; layout=(1, 2))
Code 4.70 and 4.71
Looks like Julia plots don't support change of ticks proposed in the book. But much more natural way will be to remap values we're plotting back to the original scale. Example of this is below.
μ = mean(d.weight)
σ = std(d.weight)
w = @. d.weight_s * σ + μ
scatter(w, d.height; alpha=0.3)
w_s = @. weight_seq * σ + μ
plot!(w_s, mu_mean; c=:black)
plot!(w_s, [mu_mean mu_mean]; c=:black, fillrange=mu_PI, fillalpha=0.3)
plot!(w_s, [mu_mean mu_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)
Code 4.72
d = DataFrame(CSV.File("data/cherry_blossoms.csv", missingstring="NA"))
precis(d)
┌────────────┬─────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├────────────┼─────────────────────────────────────────────────────────┤ │ year │ 1408.0 350.885 867.77 1408.0 1948.23 ████████████▂ │ │ doy │ 104.54 6.407 94.43 105.0 115.0 ▁▂▆██▅▂▁ │ │ temp │ 6.1419 0.6636 5.15 6.1 7.2947 ▁▄▆█▄▂▁▁ │ │ temp_upper │ 7.1852 0.9929 5.8977 7.04 8.9023 ▂██▃▁▁▁▁ │ │ temp_lower │ 5.0989 0.8503 3.7876 5.145 6.37 ▁▁▁▂▇█▂▁ │ └────────────┴─────────────────────────────────────────────────────────┘
Code 4.73
d2 = d[completecases(d[!,[:doy]]),:]
num_knots = 15
knots_list = quantile(d2.year, range(0, 1; length=num_knots));
Code 4.74
using BSplines
basis = BSplineBasis(3, knots_list)
16-element BSplineBasis{Vector{Float64}}:
order: 3
breakpoints: [812.0, 1036.0, 1174.0, 1269.0, 1377.0, 1454.0, 1518.0, 1583.0, 1650.0, 1714.0, 1774.0, 1833.0, 1893.0, 1956.0, 2015.0]
Code 4.75
plot(basis)
scatter!(knots_list, repeat([1], num_knots); xlab="year", ylab="basis")